suppressPackageStartupMessages(library(HoneyBADGER))
suppressPackageStartupMessages(library(Rsamtools))
suppressPackageStartupMessages(library(SingleCellExperiment))
suppressPackageStartupMessages(library(scater))
suppressPackageStartupMessages(library(ggplot2))
load("~/OneDrive - OnTheHub - The University of Oxford/Project scRNAseq analysis/LOH/R/20180712P11528.RData")
table(cov.sc["chr17:7577515-7577515",] >0, r["chr17:7577515-7577515",] >0)
##
## FALSE TRUE
## FALSE 194 0
## TRUE 13 17
For somatic mutation chr17:7577515-7577515 in tumour: * 194 cells do not have coverage * 17 cells only have mutated allele * 13 cells only have reference allele
which(sapply(NR$`11528_Blood_Exm`,length) == 2)
## chr6:57398226_T/A
## 2579
NR[2579,] <- c(30,119)
NV[2579,1:2] <- c(12,31)
blood <- unlist(NV[,1])/unlist(NR[,1])
tumour <- unlist(NV[,2])/unlist(NR[,2])
source("../../WES_analysis/R/Ahmed_functions.R")
df <- data.frame(chr = NV$chr,
pos = NV$pos,
vaf = tumour)
df <- replaceChroms(df,1)
df <- df[order( as.numeric(as.character(df[,2]))),]
df <- df[order(df[,1]),]
plot(df[,3], cex= 0.5, xaxt="n", xlab="", pch=20, las=1, col=rgb(0,0,0,alpha=0.3) , ylab = "VAF of 11528 tumour")
plotChroms(df, 1, 0.5, "blue")
In tumour sample, we can see highly confident LOH on Chromosome 2, 13, 15, 17 and 22. It also suggests that there is LOH on TP53.
df <- data.frame(chr = NV$chr,
pos = NV$pos,
vaf = blood)
df <- replaceChroms(df,1)
df <- df[order( as.numeric(as.character(df[,2]))),]
df <- df[order(df[,1]),]
plot(df[,3], cex= 0.5, xaxt="n", xlab="", pch=20, las=1, col=rgb(0,0,0,alpha=0.3) , ylab = "VAF of 11528 blood")
plotChroms(df, 1, 0.5, "blue")
df <- cbind(NV, NR)
colnames(df)[1:2] <- c("Blood_NV", "Tumour_NV")
colnames(df)[6:7] <- c("Blood_NR", "Tumour_NR")
df$tumour_vaf <- unlist(df$Tumour_NV)/unlist(df$Tumour_NR)
df$blood_vaf <- unlist(df$Blood_NV)/unlist(df$Blood_NR)
# filter out homozygous
df <- df[df$blood_vaf < 0.7 & df$blood_vaf > 0.3 ,]#& (df$tumour_vaf > 0.6 | df$tumour_vaf < 0.4),]
df <- na.omit(df)
chr <- rep(as.character(df$chr), 2)
pos <- as.numeric(rep(as.character(df$pos), 2))
df2 <- data.frame(chr = chr,
pos = pos,
vaf = c(unlist(df$tumour_vaf),unlist(df$blood_vaf)),
nr = c(unlist(df$Tumour_NR),unlist(df$Blood_NR)),
sample = c(rep("Tumour",nrow(df)), rep("Blood",nrow(df))))
df2$chr <- factor(df2$chr, levels = paste("chr", 1:22, sep = ""))
df2$vaf[df2$vaf < 0.6 & df2$vaf > 0.4] <- 0.5
# df2$vaf <- 0.5-abs(df2$vaf - 0.5)
df2$vaf[df2$vaf > 0.8] <- 1
df2$vaf[df2$vaf < 0.2] <- 0
head(df2)
for(i in unique(df2$chr))
{
df2$pos[df2$chr == i] <- (df2$pos[df2$chr == i]-min(df2$pos[df2$chr == i]))/(max(df2$pos[df2$chr == i])-min(df2$pos[df2$chr == i]))
}
ggplot(df2[!is.nan(df2$vaf),], aes(x = pos, y = sample)) +
geom_point(aes(col = vaf, size = nr)) +
facet_wrap( ~ chr , ncol = 22)+
ggplot2::scale_size_continuous(range = c(0, 6))+
scale_colour_gradient2(mid="yellow", low = "turquoise", high = "red", midpoint=0.5)+
ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(),panel.background = ggplot2::element_blank(),
axis.text.x=ggplot2::element_blank(),axis.title.x=ggplot2::element_blank(),axis.ticks.x=ggplot2::element_blank(),legend.position="none",
panel.border = ggplot2::element_rect(fill = NA, linetype = "solid", colour = "black"),plot.title = ggplot2::element_text(hjust = 0.5) )
ggplot(df2[df2$chr %in% c("chr2","chr6","chr13","chr15","chr17","chr22") & (!is.nan(df2$vaf)),], aes(x = pos, y = sample)) +
geom_point(aes(col = vaf, size = nr)) +
facet_wrap( ~ chr , ncol = 22)+
ggplot2::scale_size_continuous(range = c(0, 10))+
scale_colour_gradient2(mid="yellow", low = "turquoise", high = "red", midpoint=0.5)+
ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(),panel.background = ggplot2::element_blank(),axis.text.x=ggplot2::element_blank(),axis.title.x=ggplot2::element_blank(),axis.ticks.x=ggplot2::element_blank(),
legend.position="bottom",panel.border = ggplot2::element_rect(fill = NA, linetype = "solid", colour = "black"),plot.title = ggplot2::element_text(hjust = 0.5))
ggplot(df2[df2$chr %in% c("chr2") & (!is.nan(df2$vaf)),], aes(x = pos, y = sample)) +
geom_point(aes(col = vaf, size = nr)) +
facet_wrap( ~ chr , ncol = 22)+
ggplot2::scale_size_continuous(range = c(0, 15))+
scale_colour_gradient2(mid="yellow", low = "turquoise", high = "red", midpoint=0.5)+
ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(),panel.background = ggplot2::element_blank(),
axis.text.x=ggplot2::element_blank(),axis.title.x=ggplot2::element_blank(),axis.ticks.x=ggplot2::element_blank(),legend.position="bottom",
panel.border = ggplot2::element_rect(fill = NA, linetype = "solid", colour = "black"),
plot.title = ggplot2::element_text(hjust = 0.5))
ggplot(df2[df2$chr %in% c("chr6") & (!is.nan(df2$vaf)),], aes(x = pos, y = sample)) +
geom_point(aes(col = vaf, size = nr)) +
facet_wrap( ~ chr , ncol = 22)+
ggplot2::scale_size_continuous(range = c(0, 15))+
scale_colour_gradient2(mid="yellow", low = "turquoise", high = "red", midpoint=0.5)+
ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(),panel.background = ggplot2::element_blank(),
axis.text.x=ggplot2::element_blank(),axis.title.x=ggplot2::element_blank(),axis.ticks.x=ggplot2::element_blank(),legend.position="bottom",
panel.border = ggplot2::element_rect(fill = NA, linetype = "solid", colour = "black"),
plot.title = ggplot2::element_text(hjust = 0.5))
For these cells, we can see lose of chr6q, chr13, chr15, chr 17 and chr 22.
library(TxDb.Hsapiens.UCSC.hg19.knownGene) ## in order to map SNPs to genes
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
hb <- new('HoneyBADGER', name='P11528')
hb$setAlleleMats(r.init=r[,r["chr17:7577515-7577515",] > 0],
n.sc.init=cov.sc[, r["chr17:7577515-7577515",] > 0],
filter = F,
het.deviance.threshold=0.1, n.cores = 4) # 17 cells with mutated TP53 > 0
## Initializing allele matrices ...
## Creating in-silico bulk ...
## using 17 cells ...
## Setting composite lesser allele count ...
## Done setting initial allele matrices!
hb$setGeneFactors(txdb) ## map SNPs to genes
## Mapping snps to genes ...
## >> preparing features information... 2018-07-12 14:27:33
## >> identifying nearest features... 2018-07-12 14:27:34
## >> calculating distance from peak to TSS... 2018-07-12 14:27:34
## >> assigning genomic annotation... 2018-07-12 14:27:34
## >> assigning chromosome lengths 2018-07-12 14:27:54
## >> done... 2018-07-12 14:27:54
## Done mapping snps to genes!
hb$plotAlleleProfile() ## visualize individual SNPs
## Loading required package: reshape2
## Loading required package: gridExtra
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
hb$plotAlleleProfile(chr = c("chr2","chr6","chr13","chr15","chr17","chr22")) ## visualize individual SNPs
The cells without mutation do not have obvious CNVs on chromosomal level.
hb2 <- new('HoneyBADGER', name='P11528')
hb2$setAlleleMats(r.init=r[,cov.sc["chr17:7577515-7577515",] > 0 & r["chr17:7577515-7577515",] == 0],
n.sc.init=cov.sc[,cov.sc["chr17:7577515-7577515",] > 0 & r["chr17:7577515-7577515",] == 0], filter = F,
het.deviance.threshold=0.1, n.cores = 4)
## Initializing allele matrices ...
## Creating in-silico bulk ...
## using 13 cells ...
## Setting composite lesser allele count ...
## Done setting initial allele matrices!
hb2$setGeneFactors(txdb) ## map SNPs to genes
## Mapping snps to genes ...
## >> preparing features information... 2018-07-12 14:27:59
## >> identifying nearest features... 2018-07-12 14:27:59
## >> calculating distance from peak to TSS... 2018-07-12 14:27:59
## >> assigning genomic annotation... 2018-07-12 14:27:59
## >> assigning chromosome lengths 2018-07-12 14:28:01
## >> done... 2018-07-12 14:28:01
## Done mapping snps to genes!
hb2$plotAlleleProfile() ## visualize individual SNPs
hb2$plotAlleleProfile(chr = c("chr2","chr6","chr13","chr15","chr17","chr22")) ## visualize individual SNPs
hb3 <- new('HoneyBADGER', name='P11528')
hb3$setAlleleMats(r.init=r[,cov.sc["chr17:7577515-7577515",] == 0],
n.sc.init=cov.sc[,cov.sc["chr17:7577515-7577515",] == 0], filter = F,
het.deviance.threshold=0.1, n.cores = 4)
## Initializing allele matrices ...
## Creating in-silico bulk ...
## using 194 cells ...
## Setting composite lesser allele count ...
## Done setting initial allele matrices!
# 4536 heterozygous SNPs identified
hb3$setGeneFactors(txdb) ## map SNPs to genes
## Mapping snps to genes ...
## >> preparing features information... 2018-07-12 14:28:09
## >> identifying nearest features... 2018-07-12 14:28:09
## >> calculating distance from peak to TSS... 2018-07-12 14:28:09
## >> assigning genomic annotation... 2018-07-12 14:28:09
## >> assigning chromosome lengths 2018-07-12 14:28:11
## >> done... 2018-07-12 14:28:11
## Done mapping snps to genes!
hb3$plotAlleleProfile() ## visualize individual SNPs
hb3$plotAlleleProfile(chr = c("chr2","chr6","chr13","chr15","chr17","chr22")) ## visualize individual SNPs
sce11528 <- readRDS("../expr/p11528_FTE_SingleCellExp_object.rds")
dim(sce11528)
## [1] 22110 258
sce11528 <- sce11528[,sce11528$Sample%in% cellinfo$Sample]
scater::plotTSNE(sce11528, colour_by = "initial.cluster")
These cells are filtered to be EpCAM+ epithelium cells
# logcounts(sce) <- log1p(calculateCPM(sce))
scater::plotTSNE(sce11528, colour_by = "KRT7", size_by = "CAPS")
sce11528$p53mut.cov <- cov.sc["chr17:7577515-7577515",]
sce11528$p53mut.nv <- r["chr17:7577515-7577515",]
Some cells in cluster 11528L.2 and 11528L.3 have p53 mutations.
table(sce11528$p53mut.nv > 0,sce11528$p53mut.cov > 0, sce11528$initial.cluster)
## , , = 11528L.1
##
##
## FALSE TRUE
## FALSE 3 0
## TRUE 0 0
##
## , , = 11528L.2
##
##
## FALSE TRUE
## FALSE 31 0
## TRUE 0 14
##
## , , = 11528L.3
##
##
## FALSE TRUE
## FALSE 51 1
## TRUE 0 3
##
## , , = 11528L.4
##
##
## FALSE TRUE
## FALSE 94 10
## TRUE 0 0
##
## , , = 11528L.5
##
##
## FALSE TRUE
## FALSE 15 2
## TRUE 0 0
sce11528$tp53_mut <- "no_cov"
sce11528$tp53_mut[sce11528$p53mut.nv > 0 ] <- "mut"
sce11528$tp53_mut[sce11528$p53mut.nv == 0 & sce11528$p53mut.cov > 0] <- "no-mut"
plotTSNE(sce11528[, sce11528$tp53_mut!= "no_cov"], colour_by = "tp53_mut", shape_by = "initial.cluster")
plotTSNE(sce11528, colour_by = "p53mut.nv", size_by = "p53mut.cov")
plotTSNE(sce11528[,sce11528$initial.cluster %in% c("11528L.2","11528L.3")], colour_by = "CUL7", size_by = "p53mut.nv", shape_by = "initial.cluster")
require(biomaRt) ## for gene coordinates
## Loading required package: biomaRt
mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org")
ft <- logcounts(sce11528)
# ref <- log1p(rowMeans(counts(sce11528)[,sce11528$initial.cluster == "11528L.4"])/1e6)
ref <- rowMeans(counts(sce11528) [,sce11528$initial.cluster %in% c("11528L.4","11528L.3")])
ref <- log1p(ref/sum(ref) * 1e5)
keep <- rowSums(ft > 1) > 20
ref <- ref[keep]
ft <- ft[keep,]
# gexp.mats <- setGexpMats(ft, refFT, mart.obj, filter=FALSE, scale=T)
hb$setGexpMats(ft[,sce11528$p53mut.nv > 0], ref, mart.obj, filter=F, scale=T, verbose=F)
hb$plotGexpProfile(zlim = c(-1.2,1.2), window.size = 201)
They are LOH on chr13, 15, 17 and 22.
hb$plotAlleleProfile()
hb$plotAlleleProfile(chrs = "chr6")
colnames(ft[,sce11528$p53mut.nv > 0])
## [1] "11528L-P2-H03" "11528L-P3-B02" "11528L-P4-E07" "11528L-P4-C02"
## [5] "11528L-p1-L23" "11528L-p1-N21" "11528L-p2-A03" "11528L-p2-B01"
## [9] "11528L-p2-N02" "11528L-p2-O04" "11528L-p1-C16" "11528L-p1-C03"
## [13] "11528L-p1-E24" "11528L-p1-J05" "11528L-p1-K13" "11528L-p1-N08"
## [17] "11528L-p1-O08"
These cells may be misclassified by clustering algorithm.
sce11528$Sample[sce11528$p53mut.nv > 0 & sce11528$initial.cluster == "11528L.3"]
## [1] "11528L-p2-O04" "11528L-p1-C16" "11528L-p1-N08"
hb4 <- new('HoneyBADGER', name='P11528')
hb4$setGexpMats(ft[,c("11528L-p2-O04","11528L-p1-C16","11528L-p1-N08")], ref, mart.obj, filter=F, scale=T, verbose=TRUE)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 8465 genes and 3 cells ...
## Done setting initial expression matrices!
hb4$plotGexpProfile(zlim = c(-1.5,1.5), window.size = 201)
hb4$setAlleleMats(r.init=r[,c("11528L-p2-O04","11528L-p1-C16","11528L-p1-N08")],
n.sc.init=cov.sc[, c("11528L-p2-O04","11528L-p1-C16","11528L-p1-N08")],
filter = F,
het.deviance.threshold=0.1, n.cores = 4) # 17 cells with mutated TP53 > 0
## Initializing allele matrices ...
## Creating in-silico bulk ...
## using 3 cells ...
## Setting composite lesser allele count ...
## Done setting initial allele matrices!
hb4$setGeneFactors(txdb) ## map SNPs to genes
## Mapping snps to genes ...
## >> preparing features information... 2018-07-12 14:28:52
## >> identifying nearest features... 2018-07-12 14:28:52
## >> calculating distance from peak to TSS... 2018-07-12 14:28:52
## >> assigning genomic annotation... 2018-07-12 14:28:52
## >> assigning chromosome lengths 2018-07-12 14:28:54
## >> done... 2018-07-12 14:28:54
## Done mapping snps to genes!
hb4$plotAlleleProfile() ## visualize individual SNPs
sce11528$initial.cluster[sce11528$tp53_mut == "no-mut"]
## [1] "11528L.4" "11528L.4" "11528L.5" "11528L.3" "11528L.4" "11528L.4"
## [7] "11528L.4" "11528L.4" "11528L.4" "11528L.4" "11528L.5" "11528L.4"
## [13] "11528L.4"
hb2$setGexpMats(ft[,sce11528$tp53_mut == "no-mut"], ref, mart.obj, filter=F, scale=T, verbose=TRUE)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 8465 genes and 13 cells ...
## Done setting initial expression matrices!
hb2$plotGexpProfile(zlim = c(-1.5,1.5), window.size = 201)
hb3$setGexpMats(ft[,sce11528$tp53_mut == "no_cov"], ref, mart.obj, filter=F, scale=T, verbose=TRUE)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 8465 genes and 194 cells ...
## Done setting initial expression matrices!
hb3$plotGexpProfile(zlim = c(-1.5,1.5), window.size = 201)
hb5 <- new('HoneyBADGER', name='P11528')
hb5$setGexpMats(ft[,sce11528$tp53_mut == "no_cov"&sce11528$initial.cluster == "11528L.2"], ref, mart.obj, filter=F, scale=T, verbose=TRUE)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 8465 genes and 31 cells ...
## Done setting initial expression matrices!
hb5$plotGexpProfile(zlim = c(-1.2,1.2), window.size = 201)
hb5 <- new('HoneyBADGER', name='P11528')
hb5$setGexpMats(ft[,sce11528$tp53_mut == "no_cov"&sce11528$initial.cluster == "11528L.3"], ref, mart.obj, filter=F, scale=T, verbose=F)
hb5$plotGexpProfile(zlim = c(-1.2,1.2), window.size = 201)
gexp.mats <- setGexpMats(ft, ref, mart.obj, filter=FALSE, scale=T)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 8465 genes and 224 cells ...
## Done setting initial expression matrices!
gexp.plot.all <- plotGexpProfile(gexp.mats$gexp.norm, gexp.mats$genes,window.size = 201,zlim = c(-1.2, 1.2))
An example with chr6p amp
plot(1:396,gexp.plot.all$tlsmooth[[6]][,"11528L-p1-O08"])
An example with no chr6p amp
plot(1:396,gexp.plot.all$tlsmooth[[6]][,"11528L-P1-C01"])
100 = amplified
0 = no amplified
hist(colSums(gexp.plot.all$tlsmooth[[6]][100:200,] > 0.1),30)
48 cells with strong amp signal at chr6p:
gexp.mats <- setGexpMats(ft[,names(which(colSums(gexp.plot.all$tlsmooth[[6]][100:200,] > 0.1) > 50))], ref, mart.obj, filter=FALSE, scale=T)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 8465 genes and 48 cells ...
## Done setting initial expression matrices!
gexp.plot <- plotGexpProfile(gexp.mats$gexp.norm, gexp.mats$genes,window.size = 201,zlim = c(-1.2, 1.2))
161 cells without signals:
gexp.mats <- setGexpMats(ft[,names(which(colSums(gexp.plot.all$tlsmooth[[6]][100:200,]> 0.1) < 10))], ref, mart.obj, filter=FALSE, scale=T)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 8465 genes and 161 cells ...
## Done setting initial expression matrices!
gexp.plot <- plotGexpProfile(gexp.mats$gexp.norm, gexp.mats$genes,window.size = 201,zlim = c(-1.2, 1.2))
15 cells with dodge signals
gexp.mats <- setGexpMats(ft[,names(which(colSums(gexp.plot.all$tlsmooth[[6]][100:200,] > 0.1) >= 10 & colSums(gexp.plot.all$tlsmooth[[6]][100:200,] > 0.1) <=50))], ref, mart.obj, filter=FALSE, scale=T)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 8465 genes and 15 cells ...
## Done setting initial expression matrices!
gexp.plot <- plotGexpProfile(gexp.mats$gexp.norm, gexp.mats$genes,window.size = 201,zlim = c(-1.2, 1.2))
sce11528$chr6pamp <- colSums(gexp.plot.all$tlsmooth[[6]][100:200,] > 0.1)
table(sce11528$chr6pamp, sce11528$initial.cluster)
##
## 11528L.1 11528L.2 11528L.3 11528L.4 11528L.5
## 0 2 1 31 104 14
## 2 1 1 0 0 1
## 3 0 0 1 0 0
## 4 0 0 1 0 0
## 5 0 2 0 0 0
## 7 0 0 1 0 0
## 8 0 0 1 0 0
## 11 0 0 1 0 0
## 13 0 0 1 0 0
## 14 0 1 2 0 0
## 16 0 1 0 0 0
## 17 0 0 1 0 0
## 19 0 0 0 0 1
## 23 0 1 0 0 0
## 35 0 1 0 0 0
## 37 0 2 0 0 0
## 38 0 1 1 0 0
## 40 0 0 1 0 0
## 57 0 0 1 0 0
## 59 0 0 1 0 0
## 66 0 1 0 0 0
## 78 0 0 0 0 1
## 80 0 1 0 0 0
## 86 0 1 0 0 0
## 94 0 0 1 0 0
## 97 0 1 0 0 0
## 100 0 0 1 0 0
## 101 0 30 9 0 0
scater::plotTSNE(sce11528, colour_by = "chr6pamp", shape_by = "tp53_mut")
# jpeg(filename = "../plots/20180704HoneyBadger11528/tp53mut_ampChr6_11528FTE.jpg", res = 150,width = 10, height = 8, units = "in")
# scater::plotTSNE(sce11528, colour_by = "chr6pamp", shape_by = "tp53_mut")
# dev.off()
scater::plotPCA(sce11528, colour_by = "chr6pamp", shape_by = "tp53_mut")
# jpeg(filename = "../plots/20180704HoneyBadger11528/tp53mut_ampChr6_11528FTE2.jpg", res = 150,width = 10, height = 8, units = "in")
# scater::plotTSNE(sce11528, size_by = "chr6pamp", colour_by = "tp53_mut")
# dev.off()
ggplot(data.frame(sce11528@colData), aes(x = initial.cluster, y = chr6pamp)) + geom_point(aes(col = tp53_mut),position=position_jitterdodge()) + theme_classic()